/*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.i18n.phonenumbers.metadata.finitestatematcher;

import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode.State;

/**
 * Matches phone number regular expressions based on compact compiled data generated by
 * {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler
 * MatcherCompiler}. Typically the phone number regular expression will be compiled at build time
 * and the resulting matcher data will be packaged into the binary which needs it, or downloaded at
 * run time.
 * <p>
 * This class is designed to be lightweight and fast, and should be simple to implement in many
 * different languages (C++, Python, JS, etc.).
 *
 * TODO: Consider UnisgnedBytes.toInt(x) to avoid lots of (x & 0xFF).
 */
public abstract class DigitSequenceMatcher {

  /** Possible result types returned by a match operation. */
  public enum Result {
    /** The match operation was a success and the input was matched. */
    MATCHED,
    /** The match operation failed because unexpected input was encountered. */
    INVALID,
    /**
     * The match operation failed because the input terminated too soon (ie, the input was a
     * valid prefix for the matcher).
     */
    TOO_SHORT,
    /**
     * The match operation failed due to the existence of additional input after matching had
     * completed (ie, the the input would have matched if it were shorter).
     */
    TOO_LONG;
  }

  /** An iterator of {@code int}, used to supply the matcher with a sequence of input digits. */
  public interface DigitSequence {
    /** Returns true if there are more digits available. */
    boolean hasNext();

    /**
     * Return the next digit value (from 0 to 9 inclusive, not a char value). The matcher does not
     * test for invalid digits, so returning values outside this range will have undefined results,
     * including false positive results.
     */
    int next();
  }

  /** Internal abstraction to allow matching over either byte arrays or strings. */
  interface DataView {
    /** Return the unsigned byte value at the given offset from the current position. */
    int peekByte(int offset);

    /** Return the unsigned byte value at the current position and move ahead 1 byte. */
    int readByte();

    /** Return the unsigned short value at the current position and move ahead 2 bytes. */
    int readShort();

    /** Return the unsigned int value at the current position and move ahead 4 bytes. */
    int readInt();

    /** Adjust the current position by the given (non-negative) offset. */
    State branch(int offset);

    /**
     * Adjust the current position by the unsigned byte offset value read from the current
     * position plus the given index. This is used to implement maps and branching ranges.
     */
    State jumpTable(int index);
  }

  /**
   * Creates a new matcher which reads instructions directly from the given byte array. Typically
   * it is expected that this method will consume byte arrays packaged into a binary at build time
   * (the MatcherCompiler is not suitable for direct parsing of regular expressions at run time).
   * <p>
   * See {@code MatcherCompiler.compile(...)}.
   */
  public static DigitSequenceMatcher create(byte[] data) {
    if (data.length == 0) {
      throw new IllegalArgumentException("matcher data cannot be empty");
    }
    return new ByteArrayMatcher(data);
  }

  /**
   * Creates a new matcher which reads instructions from the given string. Typically it is expected
   * that this method will be used when matcher data is packaged as literal Java string constants
   * in (auto-generated) source files.
   * <p>
   * See {@code MatcherCompiler.compileToUnquotedJavaSourceString(...)}.
   */
  public static DigitSequenceMatcher create(String data) {
    if (data.isEmpty()) {
      throw new IllegalArgumentException("matcher data cannot be empty");
    }
    return new StringMatcher(data);
  }

  abstract DataView newDataView();

  abstract int size();

  /** Matches the input against this matcher, returning a result code. */
  public Result match(DigitSequence in) {
    State state = runMatcher(in);
    switch (state) {
      case TERMINAL:
        return !in.hasNext() ? Result.MATCHED : Result.TOO_LONG;
      case TRUNCATED:
        return Result.TOO_SHORT;
      case INVALID:
        return Result.INVALID;
      default:
        throw new AssertionError("unexpected state: " + state);
    }
  }

  private State runMatcher(DigitSequence in) {
    DataView data = newDataView();
    State state;
    do {
      state = OpCode.decode(data.peekByte(0)).execute(data, in);
    } while (state == State.CONTINUE);
    return state;
  }

  @Override
  public String toString() {
    int size = size();
    StringBuilder out = new StringBuilder(size + " :: [ ");
    DataView data = newDataView();
    while (size-- > 0) {
      out.append(Integer.toHexString(data.readByte())).append(", ");
    }
    out.setLength(out.length() - 2);
    out.append(" ]");
    return out.toString();
  }

  /** A matcher for reading instructions from a byte array. */
  private static final class ByteArrayMatcher extends DigitSequenceMatcher {

    private class ByteArrayData implements DataView {
      int position = 0;

      @Override public int peekByte(int offset) {
        return bytes[position + offset] & 0xFF;
      }

      @Override public int readByte() {
        return bytes[position++] & 0xFF;
      }

      @Override public int readShort() {
        return (readByte() << 8) | readByte();
      }

      @Override public int readInt() {
        return (readShort() << 16) | readShort();
      }

      @Override public State branch(int offset) {
        position += offset;
        return offset != 0 ? State.CONTINUE : State.TERMINAL;
      }

      @Override public State jumpTable(int index) {
        return branch(peekByte(index));
      }
    }

    private final byte[] bytes;

    private ByteArrayMatcher(byte[] data) {
      this.bytes = data;
    }

    @Override
    DataView newDataView() {
      return new ByteArrayData();
    }

    @Override
    int size() {
      return bytes.length;
    }
  }

  /** A matcher for reading instructions from a String. */
  private static final class StringMatcher extends DigitSequenceMatcher {

    /*
     * Note: Using unsigned shift "x >>> 1" is more likely to be free as part of a data load
     * instruction than "x / 2".
     */

    private class StringData implements DataView {
      int position = 0;

      @Override public int peekByte(int offset) {
        offset += position;
        int data = bytes.charAt(offset >>> 1);
        // char := hi [ even-byte | odd-byte  ] lo
        return (offset & 1) != 0 ? data & 0xFF : data >>> 8;
      }

      @Override public int readByte() {
        int data = bytes.charAt(position >>> 1);
        // char := hi [ even-byte | odd-byte  ] lo
        data = (position & 1) != 0 ? data & 0xFF : data >>> 8;
        position += 1;
        return data;
      }

      @Override public int readShort() {
        int data = bytes.charAt(position >>> 1);
        // Adding 2 early does not affect odd/even (but does reference next char).
        position += 2;
        if ((position & 1) != 0) {
          data = ((data & 0xFF) << 8) | (bytes.charAt(position >>> 1) >>> 8);
        }
        return data;
      }

      @Override public int readInt() {
        return (readShort() << 16) | readShort();
      }

      @Override public State branch(int offset) {
        position += offset;
        return offset != 0 ? State.CONTINUE : State.TERMINAL;
      }

      @Override public State jumpTable(int index) {
        return branch(peekByte(index));
      }
    }

    private final String bytes;

    private StringMatcher(String bytes) {
      this.bytes = bytes;
    }

    @Override
    DataView newDataView() {
      return new StringData();
    }

    @Override
    int size() {
      int size = 2 * bytes.length();
      if ((bytes.charAt(bytes.length() - 1) & 0xFF) == 0xFF) {
        size -= 1;
      }
      return size;
    }
  }

  /** An iterator of {@code int} that yields a sequence of input digits from a string. */
  private static final class StringDigits implements DigitSequence {
    private final CharSequence number;
    private int n = 0;

    private StringDigits(CharSequence number) {
      this.number = number;
    }

    @Override public int next() {
      if (n < 0 || n >= number.length()) {
        throw new IndexOutOfBoundsException(
            "index '" + n + "' out of bounds for input: " + number);
      }
      char c = number.charAt(n);
      if (c < '0' || c > '9') {
        throw new IllegalArgumentException(
            "non-digit character '" + c + "' [" + ((int) c) + "] at index " + n + " in: " + number);
      }
      n++;
      return c - '0';
    }

    @Override public boolean hasNext() {
      return n < number.length();
    }
  }

  /**
   * Returns an instance of DigitSequence based on the input string. The input string may only
   * contain digits.
   */
  public static DigitSequence digitsFromString(CharSequence number) {
    return new StringDigits(number);
  }

  /** A matcher has no internal state and is just a factory for data specific implementations. */
  private DigitSequenceMatcher() { }
}
